Data analysis for:



Larger prokaryotes grow faster only at high temperatures

Dylan Padilla, School of Life Sciences, Arizona State University, Tempe, AZ 85287, USA.




Library and Session info

library(ape)
library(geiger)
library(phytools)
library(phylolm)
library(lattice)
library(nlme)
library(raster)
library(rphylopic)
library(scales)
library(vioplot)


sessionInfo()
> R version 4.3.2 (2023-10-31)
> Platform: x86_64-apple-darwin20 (64-bit)
> Running under: macOS Sonoma 14.2.1
> 
> Matrix products: default
> BLAS:   /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRblas.0.dylib 
> LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
> 
> locale:
> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
> 
> time zone: America/Phoenix
> tzcode source: internal
> 
> attached base packages:
> [1] stats4    stats     graphics  grDevices utils     datasets  methods  
> [8] base     
> 
> other attached packages:
>  [1] phylolm_2.6.2               xtable_1.8-4               
>  [3] xaringanExtra_0.7.0         xaringan_0.29              
>  [5] vioplot_0.4.0               zoo_1.8-12                 
>  [7] sm_2.2-5.7.1                scales_1.3.0               
>  [9] rsvg_2.6.0                  rmarkdown_2.25             
> [11] raster_3.6-26               sp_2.1-3                   
> [13] rphylopic_1.3.0             nlme_3.1-164               
> [15] lattice_0.21-9              knitr_1.45                 
> [17] kableExtra_1.3.4            icons_0.2.0                
> [19] geiger_2.0.11               phytools_2.1-1             
> [21] maps_3.4.2                  extrafont_0.19             
> [23] emo_0.0.0.9000              DiagrammeRsvg_0.1          
> [25] DiagrammeR_1.0.11           DESeq2_1.42.0              
> [27] SummarizedExperiment_1.32.0 Biobase_2.62.0             
> [29] MatrixGenerics_1.14.0       matrixStats_1.2.0          
> [31] GenomicRanges_1.54.1        GenomeInfoDb_1.38.5        
> [33] IRanges_2.36.0              S4Vectors_0.40.2           
> [35] BiocGenerics_0.48.1         ape_5.7-1                  
> 
> loaded via a namespace (and not attached):
>   [1] RColorBrewer_1.1-3      rstudioapi_0.15.0       jsonlite_1.8.8         
>   [4] subplex_1.8             magrittr_2.0.3          farver_2.1.1           
>   [7] zlibbioc_1.48.0         vctrs_0.6.5             RCurl_1.98-1.14        
>  [10] base64enc_0.1-3         terra_1.7-71            webshot_0.5.5          
>  [13] htmltools_0.5.7         S4Arrays_1.2.0          curl_5.2.0             
>  [16] deSolve_1.40            SparseArray_1.2.3       sass_0.4.8             
>  [19] parallelly_1.36.0       bslib_0.6.1             htmlwidgets_1.6.4      
>  [22] cachem_1.0.8            lubridate_1.9.3         igraph_1.6.0           
>  [25] lifecycle_1.0.4         iterators_1.0.14        pkgconfig_2.0.3        
>  [28] Matrix_1.6-1.1          R6_2.5.1                fastmap_1.1.1          
>  [31] future_1.33.1           GenomeInfoDbData_1.2.11 digest_0.6.34          
>  [34] numDeriv_2016.8-1.1     colorspace_2.1-0        clusterGeneration_1.3.8
>  [37] fansi_1.0.6             timechange_0.3.0        httr_1.4.7             
>  [40] abind_1.4-5             compiler_4.3.2          doParallel_1.0.17      
>  [43] optimParallel_1.0-2     BiocParallel_1.36.0     highr_0.10             
>  [46] Rttf2pt1_1.3.12         MASS_7.3-60             rappdirs_0.3.3         
>  [49] DelayedArray_0.28.0     scatterplot3d_0.3-44    tools_4.3.2            
>  [52] extrafontdb_1.0         future.apply_1.11.1     glue_1.7.0             
>  [55] quadprog_1.5-8          grid_4.3.2              generics_0.1.3         
>  [58] gtable_0.3.4            xml2_1.3.6              utf8_1.2.4             
>  [61] XVector_0.42.0          foreach_1.5.2           pillar_1.9.0           
>  [64] stringr_1.5.1           dplyr_1.1.4             tidyselect_1.2.0       
>  [67] locfit_1.5-9.8          pbapply_1.7-2           V8_4.4.1               
>  [70] grImport2_0.3-1         svglite_2.1.3           xfun_0.42              
>  [73] expm_0.999-9            visNetwork_2.1.2        stringi_1.8.3          
>  [76] yaml_2.3.8              evaluate_0.23           codetools_0.2-19       
>  [79] tibble_3.2.1            cli_3.6.2               systemfonts_1.0.5      
>  [82] jquerylib_0.1.4         munsell_0.5.0           Rcpp_1.0.12            
>  [85] globals_0.16.2          coda_0.19-4             png_0.1-8              
>  [88] XML_3.99-0.16           parallel_4.3.2          ggplot2_3.4.4          
>  [91] assertthat_0.2.1        jpeg_0.1-10             bitops_1.0-7           
>  [94] listenv_0.9.0           phangorn_2.11.1         viridisLite_0.4.2      
>  [97] mvtnorm_1.2-4           purrr_1.0.2             crayon_1.5.2           
> [100] combinat_0.0-8          rlang_1.1.3             fastmatch_1.1-4        
> [103] rvest_1.0.3             mnormt_2.1.1




Beginning of the analyses







Figure 1.




#png("tree.png", height = 7, width = 7, units = "in", res = 360)


col.br <- setNames(c("purple", "orange"), c("Archaea", "Bacteria"))

plotTree(tree.vol, ftype = "i", lwd = 3, mar = c(3.5, 1, 1, 3))

par(new = TRUE, col = "transparent")

painted <- paintSubTree(tree.vol, 52, "Archaea" ,"0")
plotSimmap(painted, col.br, ftype = "i", lwd = 3, mar = c(3.5, 1, 1, 3))

par(new = TRUE, col = "transparent")

painted <- paintSubTree(tree.vol, 31, "Bacteria")
plotSimmap(painted, col.br, ftype = "i", lwd = 3, mar = c(3.5, 1, 1, 3))

par(new = TRUE, col = "black")

legend("bottomleft", legend = c("Archaea", "Bacteria"), lwd = 3, col = col.br, bty = "n")
axisPhylo(1, line = -0.1)
mtext("Time (mya)", side = 1, line = 2, at = 2000)


obj <- get("last_plot.phylo", envir = .PlotPhyloEnv)
x2 <- runif(100, obj$x.lim[2] + 10, obj$x.lim[2] + 50)
spp <- gsub("(_).*","", tree.vol$tip.label)[-c(3, 6, 9, 10, 12, 14, 16, 18, 21, 24, 28)]
spp[7] <- "Cyanobacteria"
spp[8] <- "Mycoplasma_genitalium"
spp[10] <- "Pleurocapsa fuliginosa"


col.fill <- c(rep("orange", 13), rep("purple", 7))
col.bor <- c(rep("black", 13), rep("red", 7))
idx <- 1
for(i in spp){
    x2
    uuid <- get_uuid(name = i, n = 1)
    img <- get_phylopic(uuid = uuid)
    nodes <- sapply(i, grep, x = tree.vol$tip.label)
    for(j in nodes){
        add_phylopic_base(img = img, x = sample(x2, 1), y = j, ysize = 1, color = col.bor[idx], fill = col.fill[idx])

    }
    idx = idx + 1
}

uuid <- get_uuid(name = "Chroococcus turgidus", n = 1)
img <- get_phylopic(uuid = uuid)
add_phylopic_base(img = img, x = 7400, y = 10, ysize = 1, color = "black", fill = "orange")

uuid <- get_uuid(name = "Pleurocapsa fuliginosa", n = 1)
img <- get_phylopic(uuid = uuid)
add_phylopic_base(img = img, x = 7400, y = 17, ysize = 1, color = "black", fill = "orange")

uuid <- get_uuid(name = "Fimbriimonas ginsengisoli", n = 1)
img <- get_phylopic(uuid = uuid)
add_phylopic_base(img = img, x = 7400, y = 11, ysize = 1, color = "green", fill = "orange")

#dev.off()




Figure 4




tmp.lower.dat <- tmp.lower.dat[tmp.lower.dat$d1_up < 4, ] ## Removing potential outliers
tmp.op.dat <- tmp.op.dat[tmp.op.dat$d1_up < 4, ] ## Removing potential outliers
tmp.upper.dat <- tmp.upper.dat[tmp.upper.dat$d1_up < 4, ] ## Removing potential outliers


#png("figure4.png", height = 7, width = 7, units = "in", res = 360)


layout(matrix(c(1, 1, 2, 2, 3, 3,
                1, 1, 2, 2, 3, 3,
                4, 4, 5, 5, 6, 6,
                4, 4, 5, 5, 6, 6), nrow = 4, ncol = 6, byrow = TRUE))



## lower


tmp.lower.dat <- tmp.lower.dat[tmp.lower.dat$d1_up < 4, ] ## Removing potential outliers


model6.1 <- lm(d1_up ~ log10(tmp.op), data = tmp.lower.dat)


## IC

SSX <- sum(round((log10(tmp.lower.dat$tmp.op) - mean(log10(tmp.lower.dat$tmp.op)))^2), 2)
s2 <- var(tmp.lower.dat$d1_up)
n <- length(tmp.lower.dat$d1_up)
x <- seq(min(log10(tmp.lower.dat$tmp.op)), max(log10(tmp.lower.dat$tmp.op)), length = 30)
m.x <- mean(round(log10(tmp.lower.dat$tmp.op), 1))
se <- sqrt(s2*((1/n) + (((x - m.x)^2)/SSX)))
is <- qt(0.975, df = n - 2)
ii <- qt(0.025, df = n - 2)
ic.s <- se*is
ic.i <- se*ii
upper.i <- (coef(model6.1)[1] + coef(model6.1)[2]*x) + ic.s
lower.i <- (coef(model6.1)[1] + coef(model6.1)[2]*x) + ic.i

par(mar = c(6.4, 4, 2, 0), mgp = c(2.8, 1, 0))
    
plot(d1_up ~ log10(tmp.op), data = tmp.lower.dat, ylab = expression(paste("Cell diameter ", log[10], sep = " ")(mu*m)), xlab = expression(paste("Lower temperature \u00B0C")~(log[10])), las = 1, pch = 21, cex = 1.2, type = "n")

grid(nx = NULL, ny = NULL, col = alpha("lightgray", 0.5), lwd = 1, lty = 2)

par(new = TRUE)

cols <- setNames(c("purple", "orange"), levels(as.factor(tmp.lower.dat$kingdom)))

plot(d1_up ~ log10(tmp.op), data = tmp.lower.dat, xlab = "", ylab = "", las = 1, pch = 21, col = cols[tmp.lower.dat$kingdom], bg = cols[tmp.lower.dat$kingdom], cex = 0.8, axes = FALSE)

#lines(x = x, y = (coef(model6.1)[1] + coef(model6.1)[2]*x), lwd = 2, col = "black")
#polygon(c(rev(x), x), c(rev(lower.i), upper.i), border = FALSE, col = alpha("gold", 0.3))

legend("topleft", legend = levels(as.factor((tmp.lower.dat$kingdom))), pch = 16, col = c("purple", "orange"), bg = c("purple", "orange"), bty = "n", cex = 0.8)
legend("topright", legend = paste("n = ", length(tmp.lower.dat$species)), bty = "n", cex = 0.8)

mtext("A", side = 2, line = 2.6, at = 3.8, las = 1, font = 2)


## Cell size and temp opt

tmp.op.dat <- tmp.op.dat[tmp.op.dat$d1_up < 4, ] ## Removing potential outliers

#model6 <- gls(d1_up ~ log10(tmp.op), correlation = corBrownian(phy = tree.tmp, form = ~species), data = tmp.dat, method = "ML")

#model6 <- lm(d1_up ~ log10(tmp.op), data = tmp.dat)
model6 <- lm(d1_up ~ log10(tmp.op), data = tmp.op.dat)


## IC

SSX <- sum(round((log10(tmp.op.dat$tmp.op) - mean(log10(tmp.op.dat$tmp.op)))^2), 2)
s2 <- var(tmp.op.dat$d1_up)
n <- length(tmp.op.dat$d1_up)
x <- seq(min(log10(tmp.op.dat$tmp.op)), max(log10(tmp.op.dat$tmp.op)), length = 30)
m.x <- mean(round(log10(tmp.op.dat$tmp.op), 1))
se <- sqrt(s2*((1/n) + (((x - m.x)^2)/SSX)))
is <- qt(0.975, df = n - 2)
ii <- qt(0.025, df = n - 2)
ic.s <- se*is
ic.i <- se*ii
upper.i <- (coef(model6)[1] + coef(model6)[2]*x) + ic.s
lower.i <- (coef(model6)[1] + coef(model6)[2]*x) + ic.i


par(mar = c(6.4, 2.3, 2, 0.1))

plot(d1_up ~ log10(tmp.op), data = tmp.op.dat, ylab = " ",  xlab = expression(paste("Optimum temperature \u00B0C")~(log[10])), las = 1, pch = 21, cex = 1.2, type = "n")

grid(nx = NULL, ny = NULL, col = alpha("lightgray", 0.5), lwd = 1, lty = 2)

par(new = TRUE)

cols <- setNames(c("purple", "orange"), levels(as.factor(tmp.op.dat$kingdom)))

plot(d1_up ~ log10(tmp.op), data = tmp.op.dat, xlab = "", ylab = "", las = 1, pch = 21, col = cols[tmp.op.dat$kingdom], bg = cols[tmp.op.dat$kingdom], cex = 0.8, axes = FALSE)

#lines(x = x, y = (coef(model6)[1] + coef(model6)[2]*x), lwd = 2, col = "black")
#polygon(c(rev(x), x), c(rev(lower.i), upper.i), border = FALSE, col = alpha("gold", 0.3))

legend("topleft", legend = levels(as.factor((tmp.op.dat$kingdom))), pch = 16, col = c("purple", "orange"), bg = c("purple", "orange"), bty = "n", cex = 0.8)
legend(x = 0.7, y = 3.07, legend = paste("n = ", length(tmp.op.dat$species)), bty = "n", cex = 0.8)





## upper


tmp.upper.dat <- tmp.upper.dat[tmp.upper.dat$d1_up < 4, ] ## Removing potential outliers


model6.3 <- lm(d1_up ~ log10(tmp.op), data = tmp.upper.dat)


## IC

SSX <- sum(round((log10(tmp.upper.dat$tmp.op) - mean(log10(tmp.upper.dat$tmp.op)))^2), 2)
s2 <- var(tmp.upper.dat$d1_up)
n <- length(tmp.upper.dat$d1_up)
x <- seq(min(log10(tmp.upper.dat$tmp.op)), max(log10(tmp.upper.dat$tmp.op)), length = 30)
m.x <- mean(round(log10(tmp.upper.dat$tmp.op), 1))
se <- sqrt(s2*((1/n) + (((x - m.x)^2)/SSX)))
is <- qt(0.975, df = n - 2)
ii <- qt(0.025, df = n - 2)
ic.s <- se*is
ic.i <- se*ii
upper.i <- (coef(model6.3)[1] + coef(model6.3)[2]*x) + ic.s
lower.i <- (coef(model6.3)[1] + coef(model6.3)[2]*x) + ic.i


par(mar = c(6.4, 2.3, 2, 0.2))

plot(d1_up ~ log10(tmp.op), data = tmp.upper.dat, ylab = " ", xlab = expression(paste("Upper temperature \u00B0C")~(log[10])), las = 1, pch = 21, cex = 1.2, type = "n")

grid(nx = NULL, ny = NULL, col = alpha("lightgray", 0.5), lwd = 1, lty = 2)

par(new = TRUE)

cols <- setNames(c("purple", "orange"), levels(as.factor(tmp.upper.dat$kingdom)))

plot(d1_up ~ log10(tmp.op), data = tmp.upper.dat, xlab = "", ylab = "", las = 1, pch = 21, col = cols[tmp.upper.dat$kingdom], bg = cols[tmp.upper.dat$kingdom], cex = 0.8, axes = FALSE)

lines(x = x, y = (coef(model6.3)[1] + coef(model6.3)[2]*x), lwd = 2, col = "black")
polygon(c(rev(x), x), c(rev(lower.i), upper.i), border = FALSE, col = alpha("gold", 0.3))

legend("topleft", legend = levels(as.factor((tmp.upper.dat$kingdom))), pch = 16, col = c("purple", "orange"), bg = c("purple", "orange"), bty = "n", cex = 0.8)
legend(x = 0.6, y = 3, legend = paste("n = ", length(tmp.upper.dat$species)), bty = "n", cex = 0.8)



## GROWTH


## lower


model7.1 <- lm(log10(doubling_h) ~ log10(tmp.op), data = tmp.lower.dat.growth)

## IC

SSX <- sum(round((log10(tmp.lower.dat.growth$tmp.op) - mean(log10(tmp.lower.dat.growth$tmp.op)))^2), 2)
s2 <- var(log10(tmp.lower.dat.growth$doubling_h))
n <- length(log10(tmp.lower.dat.growth$doubling_h))
x <- seq(min(log10(tmp.lower.dat.growth$tmp.op)), max(log10(tmp.lower.dat.growth$tmp.op)), length = 30)
m.x <- mean(round(log10(tmp.lower.dat.growth$tmp.op), 1))
se <- sqrt(s2*((1/n) + (((x - m.x)^2)/SSX)))
is <- qt(0.975, df = n - 2)
ii <- qt(0.025, df = n - 2)
ic.s <- se*is
ic.i <- se*ii
upper.i <- (coef(model7.1)[1] + coef(model7.1)[2]*x) + ic.s
lower.i <- (coef(model7.1)[1] + coef(model7.1)[2]*x) + ic.i

par(mar = c(6.4, 4, 2, 0), mgp = c(2.8, 1, 0))

plot(log10(doubling_h) ~ log10(tmp.op), data = tmp.lower.dat.growth, ylab = expression(paste("Doubling ", log[10], sep = " ")(h)), xlab = expression(paste("Lower temperature \u00B0C ")~(log[10])), las = 1, pch = 21, bg = alpha("black", 0.3), cex = 1.2, type = "n")

grid(nx = NULL, ny = NULL, col = alpha("lightgray", 0.5), lwd = 1, lty = 2)

par(new = TRUE)

cols3 <- c("purple", "orange")[as.numeric(as.factor(tmp.lower.dat.growth$kingdom))]

plot(log10(doubling_h) ~ log10(tmp.op), data = tmp.lower.dat.growth, xlab = "", ylab = "", las = 1, pch = 21, col = cols3, bg = cols3, cex = 0.8, axes = FALSE)

lines(x = x, y = (coef(model7.1)[1] + coef(model7.1)[2]*x), lty = 2, lwd = 2, col = "black")
polygon(c(rev(x), x), c(rev(lower.i), upper.i), border = FALSE, col = alpha("gold", 0.3))

legend("bottomleft", legend = levels(as.factor((tmp.lower.dat.growth$kingdom))), pch = 16, col = c("purple", "orange"), bg = c("purple", "orange"), bty = "n", cex = 0.8)

mtext("B", side = 2, line = 2.6, at = 3, las = 1, font = 2)
legend("topright", legend = paste("n = ", length(tmp.lower.dat.growth$species)), bty = "n", cex = 0.8)


## Doubling and optimum temp

#model7 <- gls(log(doubling_h) ~ log(tmp.op), correlation = corBrownian(phy = tree.tmp2, form = ~species), data = tmp.dat2, method = "ML")

model7 <- lm(log10(doubling_h) ~ log10(tmp.op), data = tmp.op.dat.growth)

## IC

SSX <- sum(round((log10(tmp.op.dat.growth$tmp.op) - mean(log10(tmp.op.dat.growth$tmp.op)))^2), 2)
s2 <- var(log10(tmp.op.dat.growth$doubling_h))
n <- length(log10(tmp.op.dat.growth$doubling_h))
x <- seq(min(log10(tmp.op.dat.growth$tmp.op)), max(log10(tmp.op.dat.growth$tmp.op)), length = 30)
m.x <- mean(round(log10(tmp.op.dat.growth$tmp.op), 1))
se <- sqrt(s2*((1/n) + (((x - m.x)^2)/SSX)))
is <- qt(0.975, df = n - 2)
ii <- qt(0.025, df = n - 2)
ic.s <- se*is
ic.i <- se*ii
upper.i <- (coef(model7)[1] + coef(model7)[2]*x) + ic.s
lower.i <- (coef(model7)[1] + coef(model7)[2]*x) + ic.i


par(mar = c(6.4, 2.3, 2, 0.1))

plot(log10(doubling_h) ~ log10(tmp.op), data = tmp.op.dat.growth, ylab = expression(paste("Doubling ", log[10], sep = " ")(h)), xlab = expression(paste("Optimum temperature \u00B0C ")~(log[10])), las = 1, pch = 21, bg = alpha("black", 0.3), cex = 1.2, type = "n")

grid(nx = NULL, ny = NULL, col = alpha("lightgray", 0.5), lwd = 1, lty = 2)

par(new = TRUE)

cols3 <- c("purple", "orange")[as.numeric(as.factor(tmp.op.dat.growth$kingdom))]

plot(log10(doubling_h) ~ log10(tmp.op), data = tmp.op.dat.growth, xlab = "", ylab = "", las = 1, pch = 21, col = cols3, bg = cols3, cex = 0.8, axes = FALSE)

lines(x = x, y = (coef(model7)[1] + coef(model7)[2]*x), lty = 2, lwd = 2, col = "black")
polygon(c(rev(x), x), c(rev(lower.i), upper.i), border = FALSE, col = alpha("gold", 0.3))

legend("bottomleft", legend = levels(as.factor((tmp.op.dat.growth$kingdom))), pch = 16, col = c("purple", "orange"), bg = c("purple", "orange"), bty = "n", cex = 0.8)
legend(x = 0.92, y = 0.12, legend = paste("n = ", length(tmp.op.dat.growth$species)), bty = "n", cex = 0.8)


## upper


model7.2 <- lm(log10(doubling_h) ~ log10(tmp.op), data = tmp.upper.dat.growth)

## IC

SSX <- sum(round((log10(tmp.upper.dat.growth$tmp.op) - mean(log10(tmp.upper.dat.growth$tmp.op)))^2), 2)
s2 <- var(log10(tmp.upper.dat.growth$doubling_h))
n <- length(log10(tmp.upper.dat.growth$doubling_h))
x <- seq(min(log10(tmp.upper.dat.growth$tmp.op)), max(log10(tmp.upper.dat.growth$tmp.op)), length = 30)
m.x <- mean(round(log10(tmp.upper.dat.growth$tmp.op), 1))
se <- sqrt(s2*((1/n) + (((x - m.x)^2)/SSX)))
is <- qt(0.975, df = n - 2)
ii <- qt(0.025, df = n - 2)
ic.s <- se*is
ic.i <- se*ii
upper.i <- (coef(model7.2)[1] + coef(model7.2)[2]*x) + ic.s
lower.i <- (coef(model7.2)[1] + coef(model7.2)[2]*x) + ic.i

par(mar = c(6.4, 2.3, 2, 0.2))

plot(log10(doubling_h) ~ log10(tmp.op), data = tmp.upper.dat.growth, ylab = expression(paste("Doubling ", log[10], sep = " ")(h)), xlab = expression(paste("Upper temperature \u00B0C ")~(log[10])), las = 1, pch = 21, bg = alpha("black", 0.3), cex = 1.2, type = "n")

grid(nx = NULL, ny = NULL, col = alpha("lightgray", 0.5), lwd = 1, lty = 2)

par(new = TRUE)

cols3 <- c("purple", "orange")[as.numeric(as.factor(tmp.upper.dat.growth$kingdom))]

plot(log10(doubling_h) ~ log10(tmp.op), data = tmp.upper.dat.growth, xlab = "", ylab = "", las = 1, pch = 21, col = cols3, bg = cols3, cex = 0.8, axes = FALSE)

lines(x = x, y = (coef(model7.2)[1] + coef(model7.2)[2]*x), lty = 2, lwd = 2, col = "black")
polygon(c(rev(x), x), c(rev(lower.i), upper.i), border = FALSE, col = alpha("gold", 0.3))

legend("bottomleft", legend = levels(as.factor((tmp.upper.dat.growth$kingdom))), pch = 16, col = c("purple", "orange"), bg = c("purple", "orange"), bty = "n", cex = 0.8)
legend(x = 0.6, y = -0.1, legend = paste("n = ", length(tmp.upper.dat.growth$species)), bty = "n", cex = 0.8)

#dev.off()




Model diagnosis




model6.3 <- lm(d1_up ~ log10(tmp.op), data = tmp.upper.dat)
summary(model6.3)
> 
> Call:
> lm(formula = d1_up ~ log10(tmp.op), data = tmp.upper.dat)
> 
> Residuals:
>      Min       1Q   Median       3Q      Max 
> -0.80822 -0.33283 -0.13363  0.09665  2.31893 
> 
> Coefficients:
>               Estimate Std. Error t value Pr(>|t|)   
> (Intercept)    0.04328    0.29301   0.148  0.88267   
> log10(tmp.op)  0.61525    0.19930   3.087  0.00219 **
> ---
> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> 
> Residual standard error: 0.5281 on 328 degrees of freedom
> Multiple R-squared:  0.02823, Adjusted R-squared:  0.02527 
> F-statistic:  9.53 on 1 and 328 DF,  p-value: 0.002194
layout(matrix(c(1, 1, 2, 2,
                1, 1, 2, 2,
                3, 3, 4, 4,
                3, 3, 4, 4), nrow = 4, ncol = 4, byrow = TRUE))


par(las = 1)
plot(model6.3)




Figure 2




mod <- lm(log10(doubling_h) ~ log10(volume), data = v.dat)
#summary(mod)

pg.mod <- gls(log10(doubling_h) ~ log10(volume), correlation = corBrownian(phy = tree.vol, form = ~species), data = vol.dat, method = "ML")
#summary(pg.mod)


layout(matrix(c(0, 0, 0, 0,
                1, 1, 2, 2,
                1, 1, 2, 2,
                0, 0, 0, 0), nrow = 4, ncol = 4, byrow = TRUE))

## IC

SSX <- sum(round((log10(vol.dat$volume) - mean(log10(vol.dat$volume)))^2), 2)
s2 <- var(log10(vol.dat$doubling_h))
n <- length(vol.dat$doubling_h)
x <- seq(min(log10(vol.dat$volume)), max(log10(vol.dat$volume)), length = length(vol.dat$species))
m.x <- mean(round(log(vol.dat$volume), 1))
se <- sqrt(s2*((1/n) + (((x - m.x)^2)/SSX)))
is <- qt(0.975, df = n - 2)
ii <- qt(0.025, df = n - 2)
ic.s <- se*is
ic.i <- se*ii
upper.i <- (coef(pg.mod)[1] + coef(pg.mod)[2]*x) + ic.s
lower.i <- (coef(pg.mod)[1] + coef(pg.mod)[2]*x) + ic.i



cols <- setNames(c("purple", "orange"), levels(as.factor(vol.dat$kingdom)))
vol.dat$kingdom <- as.factor(vol.dat$kingdom)

plot(log10(doubling_h) ~ log10(volume), data = vol.dat, type = "n", pch = 21, las = 1, ylab = expression(paste("Doubling ", log[10], sep = " ")*(h)), xlab = expression(paste("Cell volume ", log[10], sep = " ")(mu*m^3)))

grid(nx = NULL, ny = NULL, col = alpha("lightgray", 0.5), lwd = 1, lty = 2)

par(new = TRUE)

plot(log10(doubling_h) ~ log10(volume), data = vol.dat, type = "p", pch = 21, col = cols[vol.dat$kingdom], bg = cols[vol.dat$kingdom], las = 1, ylab = "", xlab = "", axes = FALSE)

lines(x, y = (coef(pg.mod)[1] + coef(pg.mod)[2]*x), lwd = 2)
polygon(c(rev(x), x), c(rev(lower.i), upper.i), border = FALSE, col = alpha("gold", 0.3))


legend("bottomleft", legend = unique(vol.dat$kingdom), pch = 16, col = cols, bg = cols, bty = "n")
legend(x = -2.6, y = 0, legend = paste(" = ", round(as.data.frame(summary(pg.mod)$tTable)[2, 1], 3)), bty = "n")
legend(x = -2.7, y = 0, legend = expression(beta), bty = "n")
legend(x = -2.7, y = -0.13, legend = paste("p = ", round(as.data.frame(summary(pg.mod)$tTable)[2, 4], 3)), bty = "n")
mtext("A", side = 2, at = 1.6, line = 2.5, las = 1, font = 2)


## Doubling and optimum temp

mod.fg1 <- lm(log10(doubling_h) ~ d1_up, data = tmp.op.dat.growth)

plot(log10(doubling_h) ~ d1_up, data = tmp.op.dat.growth, ylab = expression(paste("Doubling ", log[10], sep = " ")(h)), xlab = expression(paste("Cell diameter")~log[10]~(mu*m)), las = 1, pch = 21, bg = alpha("black", 0.3), cex = 1.2, type = "n")

grid(nx = NULL, ny = NULL, col = alpha("lightgray", 0.5), lwd = 1, lty = 2)

par(new = TRUE)

cols3 <- c("purple", "orange")[as.numeric(as.factor(tmp.op.dat.growth$kingdom))]

plot(log10(doubling_h) ~ d1_up, data = tmp.op.dat.growth, xlab = "", ylab = "", las = 1, pch = 21, col = cols3, bg = cols3, cex = 1, axes = FALSE)

legend("bottomleft", legend = levels(as.factor((tmp.op.dat.growth$kingdom))), pch = 16, col = c("purple", "orange"), bg = c("purple", "orange"), bty = "n", cex = 1)

mtext("B", side = 2, at = 2.35, line = 2.9, las = 1, font = 2)




Model diagnosis




mod.fg1 <- lm(log10(doubling_h) ~ d1_up, data = tmp.op.dat.growth)
summary(mod.fg1)
> 
> Call:
> lm(formula = log10(doubling_h) ~ d1_up, data = tmp.op.dat.growth)
> 
> Residuals:
>      Min       1Q   Median       3Q      Max 
> -0.93943 -0.42732  0.02871  0.46205  1.36485 
> 
> Coefficients:
>             Estimate Std. Error t value Pr(>|t|)    
> (Intercept)   1.1456     0.1689   6.782 4.86e-08 ***
> d1_up        -0.1691     0.1392  -1.214    0.232    
> ---
> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> 
> Residual standard error: 0.6067 on 38 degrees of freedom
> Multiple R-squared:  0.03737, Adjusted R-squared:  0.01203 
> F-statistic: 1.475 on 1 and 38 DF,  p-value: 0.2321
layout(matrix(c(1, 1, 2, 2,
                1, 1, 2, 2,
                3, 3, 4, 4,
                3, 3, 4, 4), nrow = 4, ncol = 4, byrow = TRUE))


par(las = 1)
plot(mod.fg1)




Figure 3




## Translation machinery

tRNA <- aggregate(spp.tRNA$tRNA_genes, by = list(spp.tRNA$species), mean, na.action = na.rm)
rRNA <- aggregate(spp.rRNA$rRNA16S_genes, by = list(spp.rRNA$species), mean, na.action = na.rm)
cell.vol <- aggregate(vol$volume, by = list(vol$species), mean)
d1_up <- aggregate(spp.d1_up$d1_up, by = list(spp.d1_up$species), mean, na.action = na.rm)
doubling <- aggregate(spp.doubling$doubling_h, by = list(spp.doubling$species), mean, na.action = na.rm)


#dim(tRNA)
names(tRNA) <- c("species", "tRNA")
#dim(rRNA)
names(rRNA) <- c("species", "rRNA")
#dim(cell.vol)
names(cell.vol) <- c("species", "volume")
#dim(d1_up)
names(d1_up) <- c("species", "d1_up")
#dim(doubling)
names(doubling) <- c("species", "doubling_h")


genes <- merge(rRNA, tRNA, by = "species")
tran <- merge(genes, cell.vol, by = "species")
tran2 <- merge(genes, d1_up, by = "species")
tran3 <- merge(genes, doubling, by = "species")

obj <- rep()
for(i in tran$species){
    kingdom <- unique(data$superkingdom[data$species == i])
    obj <- c(obj, kingdom)
}

tran$kingdom <- obj
#head(tran)

tran$species <- gsub("[[:punct:]]", "", tran$species)
tran$species <- gsub(" ", "_", tran$species)
#head(tran)

tran <- tran[!tran$species == "Sphingopyxis_alaskensis", ] ## possible outlier
rownames(tran) <- tran$species

tran.tree <- read.tree("tran.spp.nwk")
#tran.tree <- force.ultrametric(tran.tree)

check <- name.check(tran.tree, tran)
rm_phy <- check$tree_not_data
rm_dat <- check$data_not_tree
tree.tran <- drop.tip(tran.tree, rm_phy)

tran.dat <- subset(tran, subset = tran$species %in% tree.tran$tip, select = names(tran))
#name.check(tree.tran, tran.dat)
#str(tran.dat)


obj <- rep()
for(i in tran2$species){
    kingdom <- unique(data$superkingdom[data$species == i])
    obj <- c(obj, kingdom)
}

tran2$kingdom <- obj
#head(tran)

tran2$species <- gsub("[[:punct:]]", "", tran2$species)
tran2$species <- gsub(" ", "_", tran2$species)
rownames(tran2) <- tran2$species
#head(tran2)

check <- name.check(tree, tran2)
rm_phy <- check$tree_not_data
rm_dat <- check$data_not_tree
tree.tran2 <- drop.tip(tree, rm_phy)

tran.dat2 <- subset(tran2, subset = tran2$species %in% tree$tip, select = names(tran2))
#name.check(tree.tran2, tran.dat2)
#str(tran.dat2)


obj <- rep()
for(i in tran3$species){
    kingdom <- unique(data$superkingdom[data$species == i])
    obj <- c(obj, kingdom)
}

tran3$kingdom <- obj
#head(tran)

tran3$species <- gsub("[[:punct:]]", "", tran3$species)
tran3$species <- gsub(" ", "_", tran3$species)
rownames(tran3) <- tran3$species
#head(tran3)

check <- name.check(tree, tran3)
rm_phy <- check$tree_not_data
rm_dat <- check$data_not_tree
tree.tran3 <- drop.tip(tree, rm_phy)

tran.dat3 <- subset(tran3, subset = tran3$species %in% tree.tran3$tip, select = names(tran3))
##str(tran.dat3)

mod.gr1 <- lm(log10(rRNA) ~ log10(doubling_h), data = tran3)
#summary(mod.gr1)

mod.gr2 <- lm(log10(tRNA) ~ log10(doubling_h), data = tran3)
#summary(mod.gr2)


## GROWTH


#png("figure2.png", height = 7, width = 7, units = "in", res = 360)


SSX <- sum(round((log10(tran3$doubling_h) - mean(log10(tran3$doubling_h)))^2), 2)
s2 <- var(log10(tran3$rRNA))
n <- length(tran3$rRNA)
x <- seq(min(log10(tran3$doubling_h)), max(log10(tran3$doubling_h)), length = length(tran3$species))
m.x <- mean(round(log(tran3$doubling_h), 1))
se <- sqrt(s2*((1/n) + (((x - m.x)^2)/SSX)))
is <- qt(0.975, df = n - 2)
ii <- qt(0.025, df = n - 2)
ic.s <- se*is
ic.i <- se*ii
upper.i <- (coef(mod.gr1)[1] + coef(mod.gr1)[2]*x) + ic.s
lower.i <- (coef(mod.gr1)[1] + coef(mod.gr1)[2]*x) + ic.i


cols2 <- setNames(c("purple", "orange"), levels(as.factor(tran3$kingdom)))

par(mar = c(5, 4, 1.5, 3.3))

plot(log10(rRNA) ~ log10(doubling_h), data = tran3, type = "n", pch = 16, las = 1, ylab = expression(log[10]~rRNA~genes), xlab = expression(paste("Doubling ", log[10], sep = " ")(h)))

grid(nx = NULL, ny = NULL, col = alpha("lightgray", 0.5), lwd = 1, lty = 2)

par(new = TRUE)

plot(log10(rRNA) ~ log10(doubling_h), data = tran3, type = "p", pch = 16, col = cols2[tran3$kingdom], bg = cols2[tran3$kingdom], las = 1, axes = FALSE, xaxt = "n", ylab = "", xlab = "")

lines(x = x, y = (coef(mod.gr1)[1] + coef(mod.gr1)[2]*x), lwd = 2)
polygon(c(rev(x), x), c(rev(lower.i), upper.i), border = FALSE, col = alpha("gold", 0.3))


SSX <- sum(round((log10(tran3$doubling_h) - mean(log10(tran3$doubling_h)))^2), 2)
s2 <- var(log10(tran3$tRNA))
n <- length(tran3$tRNA)
x <- seq(min(log10(tran3$doubling_h)), max(log10(tran3$doubling_h)), length = length(tran3$species))
m.x <- mean(round(log(tran3$doubling_h), 1))
se <- sqrt(s2*((1/n) + (((x - m.x)^2)/SSX)))
is <- qt(0.975, df = n - 2)
ii <- qt(0.025, df = n - 2)
ic.s <- se*is
ic.i <- se*ii
upper.i <- (coef(mod.gr2)[1] + coef(mod.gr2)[2]*x) + ic.s
lower.i <- (coef(mod.gr2)[1] + coef(mod.gr2)[2]*x) + ic.i

cols2 <- setNames(c("purple", "orange"), levels(as.factor(tran3$kingdom)))

#plot(log10(tRNA) ~ log10(doubling_h), data = tran3, type = "n", pch = 16, las = 1, ylab = expression(log[10]~tRNA~genes), xlab = expression(paste("Doubling ", log[10], sep = " ")(h)))

#grid(nx = NULL, ny = NULL, col = alpha("lightgray", 0.5), lwd = 1, lty = 2)

par(new = TRUE)

plot(log10(tRNA) ~ log10(doubling_h), data = tran3, type = "p", pch = 8, col = cols2[tran3$kingdom], bg = cols2[tran3$kingdom], las = 1, axes = FALSE, xaxt = "n", ylab = "", xlab = "")

lines(x = x, y = (coef(mod.gr2)[1] + coef(mod.gr2)[2]*x), lwd = 2, lty = 2)
polygon(c(rev(x), x), c(rev(lower.i), upper.i), border = FALSE, col = alpha("gold", 0.3))

axis(side = 4, at = pretty(range(log10(tran3$tRNA))), las = 1)
mtext(expression(log[10]~tRNA~genes), side = 4, line = 2.3)

legend("topright", legend = c("Archaea", "Bacteria"), pch = 16, col = c("purple", "orange"), bg = c("purple", "orange"), bty = "n", cex = 1)
legend(x = 1.7, y = 2.18, legend = "tRNA genes", lty = 2, lwd = 2, bty = "n")

#dev.off()




Model diagnosis




mod.gr1 <- lm(log10(rRNA) ~ log10(doubling_h), data = tran3)
summary(mod.gr1)
> 
> Call:
> lm(formula = log10(rRNA) ~ log10(doubling_h), data = tran3)
> 
> Residuals:
>      Min       1Q   Median       3Q      Max 
> -0.62801 -0.22687  0.01494  0.21141  0.78185 
> 
> Coefficients:
>                   Estimate Std. Error t value Pr(>|t|)    
> (Intercept)        0.53444    0.01867  28.626   <2e-16 ***
> log10(doubling_h) -0.22266    0.02465  -9.032   <2e-16 ***
> ---
> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> 
> Residual standard error: 0.3017 on 411 degrees of freedom
> Multiple R-squared:  0.1656,  Adjusted R-squared:  0.1636 
> F-statistic: 81.57 on 1 and 411 DF,  p-value: < 2.2e-16
layout(matrix(c(1, 1, 2, 2,
                1, 1, 2, 2,
                3, 3, 4, 4,
                3, 3, 4, 4), nrow = 4, ncol = 4, byrow = TRUE))


par(las = 1)
plot(mod.gr1)

mod.gr2 <- lm(log10(tRNA) ~ log10(doubling_h), data = tran3)
summary(mod.gr2)
> 
> Call:
> lm(formula = log10(tRNA) ~ log10(doubling_h), data = tran3)
> 
> Residuals:
>      Min       1Q   Median       3Q      Max 
> -0.29963 -0.07310 -0.01229  0.04997  0.48411 
> 
> Coefficients:
>                    Estimate Std. Error t value Pr(>|t|)    
> (Intercept)        1.762027   0.007317 240.823  < 2e-16 ***
> log10(doubling_h) -0.079203   0.009662  -8.198 3.15e-15 ***
> ---
> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> 
> Residual standard error: 0.1182 on 411 degrees of freedom
> Multiple R-squared:  0.1405,  Adjusted R-squared:  0.1384 
> F-statistic:  67.2 on 1 and 411 DF,  p-value: 3.154e-15
layout(matrix(c(1, 1, 2, 2,
                1, 1, 2, 2,
                3, 3, 4, 4,
                3, 3, 4, 4), nrow = 4, ncol = 4, byrow = TRUE))


par(las = 1)
plot(mod.gr2)




Figure 5




obj <- rep()
for(i in d1_up$species){
    kingdom <- unique(data$superkingdom[data$species == i])
    obj <- c(obj, kingdom)
}

d1_up$kingdom <- obj
head(d1_up)
>                             species d1_up  kingdom
> 1           [Clostridium] aldenense   1.1 Bacteria
> 2           [Clostridium] caenicola   0.6 Bacteria
> 3          [Clostridium] fimetarium   0.6 Bacteria
> 4           [Clostridium] lavalense   1.5 Bacteria
> 5           [Clostridium] paradoxum   1.1 Bacteria
> 6 [Clostridium] polysaccharolyticum   1.1 Bacteria
d1_up$species <- gsub("[[:punct:]]", "", d1_up$species)
d1_up$species <- gsub(" ", "_", d1_up$species)
rownames(d1_up) <- d1_up$species
dim(d1_up)
> [1] 1603    3
head(d1_up)
>                                                         species d1_up  kingdom
> Clostridium_aldenense                     Clostridium_aldenense   1.1 Bacteria
> Clostridium_caenicola                     Clostridium_caenicola   0.6 Bacteria
> Clostridium_fimetarium                   Clostridium_fimetarium   0.6 Bacteria
> Clostridium_lavalense                     Clostridium_lavalense   1.5 Bacteria
> Clostridium_paradoxum                     Clostridium_paradoxum   1.1 Bacteria
> Clostridium_polysaccharolyticum Clostridium_polysaccharolyticum   1.1 Bacteria
d1_up <- d1_up[d1_up$d1_up < 6, ]

size <- lm(d1_up ~ kingdom, data = d1_up)
summary(size)
> 
> Call:
> lm(formula = d1_up ~ kingdom, data = d1_up)
> 
> Residuals:
>     Min      1Q  Median      3Q     Max 
> -1.1004 -0.2783 -0.0783  0.1217  4.6217 
> 
> Coefficients:
>                 Estimate Std. Error t value Pr(>|t|)    
> (Intercept)      1.25044    0.04059  30.803   <2e-16 ***
> kingdomBacteria -0.37217    0.04339  -8.577   <2e-16 ***
> ---
> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> 
> Residual standard error: 0.5727 on 1594 degrees of freedom
> Multiple R-squared:  0.04412, Adjusted R-squared:  0.04352 
> F-statistic: 73.57 on 1 and 1594 DF,  p-value: < 2.2e-16
obj <- rep()
for(i in doubling$species){
    kingdom <- unique(data$superkingdom[data$species == i])
    obj <- c(obj, kingdom)
}

doubling$kingdom <- obj
head(doubling)
>                              species doubling_h  kingdom
> 1              [Bacillus] caldotenax       0.24 Bacteria
> 2 [Butyribacterium] methylotrophicum      20.00 Bacteria
> 3      [Clostridium] alkalicellulosi      14.00 Bacteria
> 4            [Clostridium] paradoxum       0.67 Bacteria
> 5         [Clostridium] stercorarium       8.60 Bacteria
> 6           [Clostridium] termitidis       9.62 Bacteria
doubling$species <- gsub("[[:punct:]]", "", doubling$species)
doubling$species <- gsub(" ", "_", doubling$species)
rownames(doubling) <- doubling$species
dim(doubling)
> [1] 928   3
head(doubling)
>                                                           species doubling_h
> Bacillus_caldotenax                           Bacillus_caldotenax       0.24
> Butyribacterium_methylotrophicum Butyribacterium_methylotrophicum      20.00
> Clostridium_alkalicellulosi           Clostridium_alkalicellulosi      14.00
> Clostridium_paradoxum                       Clostridium_paradoxum       0.67
> Clostridium_stercorarium                 Clostridium_stercorarium       8.60
> Clostridium_termitidis                     Clostridium_termitidis       9.62
>                                   kingdom
> Bacillus_caldotenax              Bacteria
> Butyribacterium_methylotrophicum Bacteria
> Clostridium_alkalicellulosi      Bacteria
> Clostridium_paradoxum            Bacteria
> Clostridium_stercorarium         Bacteria
> Clostridium_termitidis           Bacteria
growth <- lm(doubling_h ~ kingdom, data = doubling)
summary(growth)
> 
> Call:
> lm(formula = doubling_h ~ kingdom, data = doubling)
> 
> Residuals:
>    Min     1Q Median     3Q    Max 
> -13.64 -10.87  -8.89  -1.36 421.12 
> 
> Coefficients:
>                 Estimate Std. Error t value Pr(>|t|)    
> (Intercept)       13.867      2.071   6.695 3.75e-11 ***
> kingdomBacteria   -1.981      2.380  -0.832    0.405    
> ---
> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> 
> Residual standard error: 31.07 on 926 degrees of freedom
> Multiple R-squared:  0.0007477,   Adjusted R-squared:  -0.0003314 
> F-statistic: 0.6929 on 1 and 926 DF,  p-value: 0.4054
##png("figure5.png", height = 7, width = 7, units = "in", res = 360)

layout(matrix(c(0, 0, 0, 0,
                 1, 1, 2, 2,
                 1, 1, 2, 2,
                 0, 0, 0, 0), nrow = 4, ncol = 4, byrow = TRUE))

## Basic boxplot

vioplot(log10(doubling_h) ~ kingdom, data = doubling, border = NA, method = "jitter", side = "right", ylab = expression(paste("Doubling")~log[10]~(h)), xlab = "Kingdom", col = "white", las = 1)

grid(nx = NULL, ny = NULL, col = alpha("lightgray", 0.5), lwd = 1, lty = 2)

par(new = TRUE)

vioplot(log10(doubling_h) ~ kingdom, data = doubling, border = NA, method = "jitter", side = "right", ylab = "", xlab = "", col = c(alpha("purple", 0.2), alpha("orange", 0.2)), las = 1)

segments(x0 = 1, y0 = mean(log10(doubling$doubling_h)[doubling$kingdom == "Archaea"]), x1 = 1.4, y1 = mean(log10(doubling$doubling_h)[doubling$kingdom == "Archaea"]), lwd = 2, lty = 2, col = "black")
text(x = 1.45, y = mean(log10(doubling$doubling_h)[doubling$kingdom == "Archaea"]), expression(mu))

segments(x0 = 2, y0 = mean(log10(doubling$doubling_h)[doubling$kingdom == "Bacteria"]), x1 = 2.39, y1 = mean(log10(doubling$doubling_h)[doubling$kingdom == "Bacteria"]), lwd = 2, lty = 2, col = "black")
text(x = 2.45, y = mean(log10(doubling$doubling_h)[doubling$kingdom == "Bacteria"]), expression(mu))

text(x = 0.65, y = 2.5, paste("n =", length(doubling$species), sep = " "), cex = 1.1)

#mean(doubling$doubling[doubling$kingdom == "Archaea"])
#mean(doubling$doubling[doubling$kingdom == "Bacteria"])

stripchart(log10(doubling_h) ~ kingdom, vertical = TRUE, data = doubling, method = "jitter", add = TRUE, pch = 20, col = c(alpha("purple", 0.3), alpha("orange", 0.5)), cex = 1.3)

mtext("A", side = 2, at = 3, line = 3, las = 1, font = 2)


## Basic boxplot

vioplot(d1_up ~ kingdom, data = d1_up, border = NA, method = "jitter", side = "right", ylab = expression(paste("Cell size")~log[10]~(mu*m)), xlab = "Kingdom", col = "white", las = 1)

grid(nx = NULL, ny = NULL, col = alpha("lightgray", 0.5), lwd = 1, lty = 2)

par(new = TRUE)

vioplot(d1_up ~ kingdom, data = d1_up, border = NA, method = "jitter", side = "right", ylab = "", xlab = "", col = c(alpha("purple", 0.2), alpha("orange", 0.2)), las = 1)

segments(x0 = 1, y0 = mean(d1_up$d1_up[d1_up$kingdom == "Archaea"]), x1 = 1.311, y1 = mean(d1_up$d1_up[d1_up$kingdom == "Archaea"]), lwd = 2, lty = 2, col = "black")
text(x = 1.4, y = mean(d1_up$d1_up[d1_up$kingdom == "Archaea"]), expression(mu))

segments(x0 = 2, y0 = mean(d1_up$d1_up[d1_up$kingdom == "Bacteria"]), x1 = 2.4, y1 = mean(d1_up$d1_up[d1_up$kingdom == "Bacteria"]), lwd = 2, lty = 2, col = "black")
text(x = 2.45, y = mean(d1_up$d1_up[d1_up$kingdom == "Bacteria"]), expression(mu))

text(x = 0.65, y = 5.3, paste("n =", length(d1_up$species), sep = " "), cex = 1.1)

#mean(d1_up$d1_up[d1_up$kingdom == "Archaea"])
#mean(d1_up$d1_up[d1_up$kingdom == "Bacteria"])

stripchart(d1_up ~ kingdom, vertical = TRUE, data = d1_up, method = "jitter", add = TRUE, pch = 20, col = c(alpha("purple", 0.3), alpha("orange", 0.5)), cex = 1.3)

mtext("B", side = 2, at = 6, line = 3, las = 1, font = 2)

#dev.off()




Model diagnosis




size <- lm(d1_up ~ kingdom, data = d1_up)
summary(size)
> 
> Call:
> lm(formula = d1_up ~ kingdom, data = d1_up)
> 
> Residuals:
>     Min      1Q  Median      3Q     Max 
> -1.1004 -0.2783 -0.0783  0.1217  4.6217 
> 
> Coefficients:
>                 Estimate Std. Error t value Pr(>|t|)    
> (Intercept)      1.25044    0.04059  30.803   <2e-16 ***
> kingdomBacteria -0.37217    0.04339  -8.577   <2e-16 ***
> ---
> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> 
> Residual standard error: 0.5727 on 1594 degrees of freedom
> Multiple R-squared:  0.04412, Adjusted R-squared:  0.04352 
> F-statistic: 73.57 on 1 and 1594 DF,  p-value: < 2.2e-16
layout(matrix(c(1, 1, 2, 2,
                1, 1, 2, 2,
                3, 3, 4, 4,
                3, 3, 4, 4), nrow = 4, ncol = 4, byrow = TRUE))


par(las = 1)
plot(size)

growth <- lm(doubling_h ~ kingdom, data = doubling)
summary(growth)
> 
> Call:
> lm(formula = doubling_h ~ kingdom, data = doubling)
> 
> Residuals:
>    Min     1Q Median     3Q    Max 
> -13.64 -10.87  -8.89  -1.36 421.12 
> 
> Coefficients:
>                 Estimate Std. Error t value Pr(>|t|)    
> (Intercept)       13.867      2.071   6.695 3.75e-11 ***
> kingdomBacteria   -1.981      2.380  -0.832    0.405    
> ---
> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
> 
> Residual standard error: 31.07 on 926 degrees of freedom
> Multiple R-squared:  0.0007477,   Adjusted R-squared:  -0.0003314 
> F-statistic: 0.6929 on 1 and 926 DF,  p-value: 0.4054
layout(matrix(c(1, 1, 2, 2,
                1, 1, 2, 2,
                3, 3, 4, 4,
                3, 3, 4, 4), nrow = 4, ncol = 4, byrow = TRUE))


par(las = 1)
plot(growth)




Figure S1




## Relationship between diameter and volume

d1_up <- aggregate(spp.d1_up$d1_up, by = list(spp.d1_up$species), mean, na.action = na.rm)
cell.vol <- aggregate(vol$volume, by = list(vol$species), mean)


names(d1_up) <- c("species", "d1_up")
names(cell.vol) <- c("species", "volume")


d1_up$species <- gsub("[[:punct:]]", "", d1_up$species)
d1_up$species <- gsub(" ", "_", d1_up$species)
cell.vol$species <- gsub(" ", "_", cell.vol$species)

di.vol <- merge(cell.vol, d1_up, by = "species")

p.cor <- cor.test(log10(di.vol$volume), di.vol$d1_up, method = "pearson")


##png("figure3.png", height = 7, width = 7, units = "in", res = 360)


plot(log10(volume) ~ d1_up, data = di.vol, pch = 16, las = 1, ylab = expression(paste("Cell volume ", log[10], sep = " ")(mu*m^3)), xlab = expression(paste("Cell diameter ", log[10], sep = " ")(mu*m)), mgp = c(2.6, 1, 0), type = "n")

grid(nx = NULL, ny = NULL, col = alpha("lightgray", 0.5), lwd = 1, lty = 2)

par(new = TRUE)

plot(log10(volume) ~ d1_up, data = di.vol, pch = 16, las = 1, ylab = "", xlab = "", mgp = c(2.6, 1, 0), type = "p", axes = FALSE)


abline(lm(log10(di.vol$volume) ~ di.vol$d1_up), lwd = 2)
legend(x = 0.1, y = 2.3, legend = expression("R"^2==~0.877), bty = "n")
legend(x = 0.1, y = 2.06, legend = expression(p==~0.001), bty = "n")

##dev.off()